home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
IRIX 6.5 Applications 2002 November
/
SGI IRIX 6.5 Applications 2002 November.iso
/
dev
/
insight_dev.idb
/
usr
/
share
/
Insight
/
bin
/
glossQA.z
/
glossQA
Wrap
Text File
|
2002-10-15
|
6KB
|
309 lines
#!/usr/bin/perl5
####################################################################
#
# Name: glossQA
#
# Note: PERL 5 or greater is required for this script.
#
# Function: Scan an SGML file to find all instances of glossary terms.
# Compare to determine if term exists in SGML; check global glossary
# if specified/available.
#
# Author: Ferg (gferg@sgi.com) / Adrian Daley (agd@sgi.com)
#
# Other Information:
#
# SGIDOCBK Glossary -
#
# <glossterm> tags also appear within the content of the book
#
# <glossentry>
# <glossterm></glossterm>
# <glossdef></glossdef>
# </glossentry>
#
#
# SGIDOC Glossary -
#
# <glossaryitem> occurs throughout the book
#
# <glossaryterm>
# <glossaryentry></glossaryentry>
# <glossarydef></glossarydef>
# </glossaryterm>
#
#
# SGIDOC SGIDOCBK
# --------- --------------
# glossaryitem glossterm
# glossaryentry glossentry
# glossaryterm glossterm
# glossarydef glossdef
#
#
# Version 1.1 - 02Mar99
# Updated old code; ability to recognize/use sgidocbk or sgidoc
#
####################################################################
my($fname, $gg, $arg, $s) = '';
local(@ref_terms, @def_terms, @unres_terms) = ();
my($ignore_gg)=0;
my($_GG) = '/hosts/bonnie.engr/depot/doc/1000/007-1859-060/gloss.sgm';
# Check command-line args
#
while($arg = shift(@ARGV)) {
if ($arg =~ /^-h/) {
usage('');
} elsif ($arg =~ /^-g/) {
if (($gg = shift(@ARGV)) eq '') {
usage('');
}
} elsif ($arg =~ /^-ignore/) {
$ignore_gg = 1;
} elsif (!($arg =~ /^-/)) {
$fname = $arg;
}
}
if ($fname eq '') {
usage('');
} elsif (!(-r $fname)) {
usage("glossQA: ERROR - cannot read $fname ($!)");
}
# Global glossary location; note that cmd line spec overrides env var
#
if ($ignore_gg == 0) {
if ($gg eq '') {
if ($ENV{GLOBAL_GLOSS}) {
$gg = $ENV{GLOBAL_GLOSS};
} else {
# XXX I don't like this at all...
#
$gg = $_GG;
}
}
if (!(-r $gg)) {
print STDOUT "\nglossQA: warning - cannot read from ",
"global glossary : ($gg)\n";
}
}
# Collect up all terms; put in our available lists:
#
# ref_terms = referenced terms
# def_terms = defined terms
#
&collectTerms($fname);
my($total_terms) = (@ref_terms + 0);
$fname =~ s#.*/##g;
if ($total_terms == 0 ) {
print STDOUT "\nglossQA: No glossary terms were found in $fname\n";
exit(0);
}
# See if there are any unresolved terms
#
&checkTerms();
undef (@ref_terms);
undef (@def_terms);
# No unresolved terms, exit; otherwise print them out as warnings if we
# are processing the global glossary (otherwise they get printed 2X)
#
if ((@unres_terms + 0) == 0 ) {
print STDOUT "\nglossQA: $total_terms glossary references found, ",
"0 undefined terms for $fname\n";
exit(0);
} elsif ($ignore_gg == 0) {
print STDOUT "\nglossQA: warning - cannot find term (locally): ",
join("\nglossQA: warning - cannot find term (locally): ",
@unres_terms), "\n";
}
@ref_terms = (@unres_terms);
# Now collect terms from the global glossary
#
if ($ignore_gg == 0 && $gg ne '') {
print STDOUT "\nglossQA: checking global glossary ($gg)\n";
&collectTerms($gg);
&checkTerms();
} else {
print STDOUT "\nglossQA: warning - ignoring the global glossary\n";
}
# Report to user
#
print STDOUT "\nglossQA: $total_terms glossary references found, ",
(@unres_terms + 0), " undefined terms for $fname\n";
if ((@unres_terms + 0) > 0 ) {
print STDOUT "\nglossQA: error - cannot find term: ",
join("\nglossQA: error - cannot find term: ", @unres_terms),
"\n";
}
exit(0);
### END MAIN PROGRAM ###
####################################################################
#
# void collectTerms(string $input_file)
#
# given an input file, collect up all referenced glossary
# terms AND any defined entries
#
# ref_terms = referenced terms
# def_terms = defined terms
#
####################################################################
sub collectTerms {
my($fname) = @_;
my($pat) = 'GLOSSARYITEM|GLOSSTERM|GLOSSARYENTRY';
my($g_pat) = 'GLOSSARY';
my($val,$s)= '';
my($found_gloss) = 0;
# Read in specified file
#
open(FH, $fname) or usage("glossQA: ERROR - cannot open $fname ($!)");
while (<FH>) {
if ($found_gloss == 0 && /<$g_pat>/i) {
$found_gloss = 1;
}
# Look for terms within book for both DTD types.
#
# We know whether or not we are inside the <glossary>,
# and we push the term onto the appropriate list
#
while (s/<($pat)[^>]*>(.*?)<\/($pat)>/$1/i) {
($val = $2) =~ s/\\/\\\\/;
$val =~ s/\(/\\\(/;
$val =~ s/\)/\\\)/;
$val =~ s/\&\w+\;//g;
# remove extra whitespace
$val =~ s/^\s+|\s+$//g;
# With international character-sets, we could
# not use grep() effectively, hence the eq check
#
if( $found_gloss == 0 ) {
foreach $s (@ref_terms) {
if($s eq $val) {
$val = '';
last;
}
}
if ($val ne '') {
push(@ref_terms, $val);
}
} else {
foreach $s (@def_terms) {
if($s eq $val) {
$val = '';
last;
}
}
if ($val ne '') {
push(@def_terms, $val);
}
}
}
}
close(FH);
}
####################################################################
#
# void checkTerms()
#
# Look at all referenced terms, see if available in def terms,
# if not, then push onto our list of unresolved terms
#
####################################################################
sub checkTerms {
@unres_terms = ();
my($val,$s) = '';
foreach $val (@ref_terms) {
# With international character-sets, we could
# not use grep() effectively, hence the eq check
#
foreach $s (@def_terms) {
if($s eq $val) {
$val = '';
last;
}
}
if ($val ne '') {
push(@unres_terms, $val);
}
}
}
####################################################################
#
# Prints the program's usage statement and exits.
# Pass in a msg string to print out, in event of error
#
####################################################################
sub usage {
my($msg) = @_;
my($name) = $0;
$name =~ s#.*/##g;
print <<END_USAGE;
$name Version 1.1
Usage: $name [-h] [-g <global_gloss> | -ignore] [<input_file>]
-h Print this help message
-g <global_gloss> Name/path to global glossary SGML file
-ignore Ignore the global glossary file (default is to use one)
<input_file> SGML file to check
END_USAGE
if ($msg ne '') {
print STDERR "\n$msg\n";
}
exit(0);
}